** TaXSIM IS CURRENTLY NOT WORKING
** This file shows the process we did at the time to obtain the data
** The goal of this file was to create : Taxsim_output
** This file contains the imputed Marginal taxrates

/*
** Data Processing 1
** This file proceeds to clean the data, and prepare it to be used with taxsim

use ".\data\CPS2015-2017.dta", clear

gen sort_id=_n
keep if month==0
keep if asecflag==1
** Most variables are coded with 99999.... when not available. For our purposes they need to be
** zeroed
foreach i of varlist inc* {
	qui:sum `i'
	display "`i': max value: " r(max)
	replace `i'=0 if `i'==r(max)
}

** create household level income
foreach i of varlist inc* {
	bysort year month serial:egen h`i'=sum(`i')
	local lbl:variable label `i'
	label var h`i' "Household `lbl'"
}
** Flag households when The sum of total income is different from data Household income
** This is to avoid problems with measurment errors
** This are 4012 records 

gen byte flag=0
label define flag 0 "no errors", modify
label values flag flag 
replace flag=1 if hinctot!=hhincome & hhincome!=.

label define flag 1 "Hhincome != sum(inctot)", modify
** we will also be Flaggin households with negative or zero income
** Additional are 3729 records
replace flag=2 if hinctot<=0 & hhincome!=. & flag==0
label define flag 2 "Negative total income", modify
** The next step is Flagging Couple and Single Households
** This will be defined by whoever declares to be the Householder 
** This ignores, for example, if the grandfather (for being the oldest) is a widow
** who is living with his child and child's spouse. From a tax perspective, 
** The Child and Childs Spouse may be the one filling taxes.

** Trick: Ill recode unmarried parter as Spouse. 
** This may help later. but will keep the code for filling sep


	by year month serial :egen fill_sep=sum(relate==1114)
	replace relate=201 if relate==1114

	bysort year month serial :egen marriedhh=sum(relate<=201)
	label var    marriedhh "Married Household"
	label define marriedhh  0 "error" 1 "Single" 2 "Couple"
	label values marriedhh  marriedhh 
	replace  flag=3 if marriedhh==0 & flag==0
	label define flag 3 "Households without a head", modify
	
	gen byte flag_couple=0 if marriedhh==1
	replace  flag_couple=1 if marriedhh==2

	by year month serial :egen nohead=sum(relate==101)
	
** At this point i ll create a Quintile by household
	by year month serial: gen n_flag=_n
	egen q_income=xtile(hhincome) if n_flag==1, weight(asecwth) by(year) n(5)
	drop n_flag		
	by year month serial:replace q_income=q_income[_n-1] if _n>1
	
** flag households with Other members
** What to do with "non_nuclear households"? We exclude them because we concentrate on nuclear households
** The model , and taxes, assumes only 1 or 2 tax payers. 
** The model also assumes there is only 1 family in the household. 
** For this simply FLAG using famunit, and keep Fam 1
	replace	 flag=4 if famunit>1 & flag==0
	label define flag 4 "Secondary or tertiary family", modify


** We also need to constrain the Nuclear households. Thus we need to drop households with other members.
	bysort  year month serial :egen flag_nnuclear=sum(relate>301)
	replace flag=5 if flag_nnuclear>0 & flag==0
	label define flag 5 "Non nuclear Households", modify

** There are 98,287 records with this. So we need to stablish that we are working with Nuclear households
** From here on we create some demographics
** All households are parents and Children
 
** Children per household
	bysort year month serial:egen nkids05  =sum((age>=0 & age<=5)*(relate==301))
	bysort year month serial:egen nkids613 =sum((age>=6 & age<=13)*(relate==301))
** nkids 14-17 required for taxes purposes
	bysort year month serial:egen nkids1417=sum((age>=14 & age<=17)*(relate==301))
	bysort year month serial:egen nkids1418=sum((age>=14 & age<=18)*(relate==301))
	bysort year month serial:egen nkids19p =sum((age>=19 & age<=90)*(relate==301))
	label var nkids05   "Number of kids 0-5"
	label var nkids613  "Number of kids 6-12"
	label var nkids1418 "Number of kids 13-18"
	label var nkids19p  "Number of kids older than 19"
** This last one is to obtain an additional level of data constrain. 
** Households where children are "old" 

	replace flag=6 if nkids19p>0 & flag==0
	label define flag 6 "HH with older children", modify
** Perhaps will have drop households with older children.

** getting some variables for husband and wife (spouse/partner)
** This has become trickier with the presence of same sex households. 
** so we do a different approach:

	gen byte head_spouse=(relate<250)
	** Create a variable to FLAG Head and Spouse by sex ONCE!
	gsort year month serial -head_spouse sex -age  sort_id
	gen long id_sort=_n 
	by year month serial:gen pid_sort=_n
** Id same sex couples: For now I ll keep Same sex couples
	by    year month serial :gen same_sex=(sex[1]==sex[2])
	replace same_sex=. if   flag_couple==0
	
** From here forward, I create data for Husbands and wifes. Husband is 1, wife is 2.	
** Age for husband and wife, plus other characteristics
	by    year month serial:gen age_1=age[1]  
	by    year month serial:gen age_2=age[2] 
	replace age_2=0 if flag_couple==0

	label var age_1 "Husband's age"
	label var age_2 "Spouse's  age"
** Recoding education
	recode educ (0/71=1 "Less than HS") (72 73=2 "HighSchool") (80/110=3 "Some College") (111/122=4 "College") (123/125=5 "Grad" ), gen(educl)
	label var educl "Education level by group"

	by    year month serial:gen educ_1=educl[1]
	by    year month serial:gen educ_2=educl[2]
	replace educ_2=0 if flag_couple==0

	label var educ_1 "Husband's level by group"
	label var educ_2 "Spouse's  level by group"
	label values educ_1 educ_2 educl
** Race 
	
	gen byte race_g=1 if int(race/100)==1
	replace  race_g=2 if int(race/100)==2
	replace  race_g=3 if hispan!=0
	replace  race_g=4 if race_g==.
	label define race_g 1 White 2 Black 3 Hispanic 4 Other
	label values race_g race_g
	label var    race_g "Group Race variable "
	by year month serial:gen race_1=race_g[1] 
	by year month serial:gen race_2=race_g[2]
	label var race_1 "Husband's Race"
	label var race_2 "Spouse's Race"
	replace race_2=0 if flag_couple==0
	label values race_1 race_2 race_g
	** Aux variables * Do we need this?
	*gen agesq_1=age_1^2
	*gen agesq_2=age_2^2
	** Region and division
	ren region division
	gen region=int(division/10)
	label define region2 1 "NorthEast" 2 "MidWest" 3 "South" 4 "West"
	label values region region2
	

** Drop individuals with ANY selfemp income NOT ANYMORE Here we make the changes? Later
*bysort year month serial:egen incbus_1=max(incbus*(sex==1)*(relate<250))
*bysort year month serial:egen incbus_2=max(incbus*(sex==2)*(relate<250))
*bysort year month serial:egen class_1=max(classwkr*(sex==1)*(relate<250))
*bysort year month serial:egen class_2=max(classwkr*(sex==2)*(relate<250))

////////////////////////////////////////////////////////////////////////
// Here we merge the data with information from CEX and SCF. using frames

capture:frame create cex
capture:frame create scf

* This files were created to obtain average of specific assets 
frame scf:use ".\data\Trump_SCF_final data_new.dta", clear
frame cex:use ".\data\data_TaxSim_CEX_Division_09_22_20.dta", clear


frame cex:drop median*
frame scf:sum
frame cex:sum

ren married marreid_plus
gen married=1     if marreid_plus==2
replace married=0 if marreid_plus==1
ren division division_plus
egen division=group(division_plus)
ren q_income quintile

frlink m:1  married quintile , frame(scf)
frlink m:1  year division married quintile , frame(cex)

frget capital, from(scf)
frget mean*, from(cex)

foreach i of varlist mean* {
	replace `i'=0 if `i'==.
}
////////////////////////////////////////////////////////////////////////

	
** TAXSIM v32
** First Verify latest Version 32 is installed
capture: net from "http://www.nber.org/stata"
capture: net describe taxsim32
capture:net install taxsim32, replace

* Uses a Slighly different State coding
keep if relate< 250
gen state=.
replace state=1 if statefip==1
replace state=2 if statefip==2
replace state=3 if statefip==4
replace state=4 if statefip==5
replace state=5 if statefip==6
replace state=6 if statefip==8
replace state=7 if statefip==9
replace state=8 if statefip==10
replace state=9 if statefip==11
replace state=10 if statefip==12
replace state=11 if statefip==13
replace state=12 if statefip==15
replace state=13 if statefip==16
replace state=14 if statefip==17
replace state=15 if statefip==18
replace state=16 if statefip==19
replace state=17 if statefip==20
replace state=18 if statefip==21
replace state=19 if statefip==22
replace state=20 if statefip==23
replace state=21 if statefip==24
replace state=22 if statefip==25
replace state=23 if statefip==26
replace state=24 if statefip==27
replace state=25 if statefip==28
replace state=26 if statefip==29
replace state=27 if statefip==30
replace state=28 if statefip==31
replace state=29 if statefip==32
replace state=30 if statefip==33
replace state=31 if statefip==34
replace state=32 if statefip==35
replace state=33 if statefip==36
replace state=34 if statefip==37
replace state=35 if statefip==38
replace state=36 if statefip==39
replace state=37 if statefip==40
replace state=38 if statefip==41
replace state=39 if statefip==42
replace state=40 if statefip==44
replace state=41 if statefip==45
replace state=42 if statefip==46
replace state=43 if statefip==47
replace state=44 if statefip==48
replace state=45 if statefip==49
replace state=46 if statefip==50
replace state=47 if statefip==51
replace state=48 if statefip==53
replace state=49 if statefip==54
replace state=50 if statefip==55
replace state=51 if statefip==56

*** For later use OCC for SBT
 
	gen byte occ_sbt=0
	replace  occ_sbt=1 if inlist(occ2010,350,500,2760,1430)
	replace  occ_sbt=1 if inrange(occ2010,3000,3650)
	replace  occ_sbt=1 if inlist(occ2010,2100,2140,2150,2720,2700)
	replace  occ_sbt=1 if inrange(occ2010,800,950)
	replace  occ_sbt=1 if inrange(occ2010,1600,1980)
	replace  occ_sbt=1 if inrange(occ2010,2600,2920)
	by year month serial:gen occ_sbt_1=occ_sbt[1]
	by year month serial:gen occ_sbt_2=occ_sbt[2]
	
*** Here comes the big changes. Consult https://users.nber.org/~taxsim/taxsim32/ for details on definitions 
	* taxsimid and year will be done separately.
	* Mstatus
	gen byte mstat=1 if flag_couple==0
	replace  mstat=2 if flag_couple==1
	** replace  mstat=6 if fill_sep   ==1 Ignored for now. May change it later
	** we ignore Children with income. And simply add them to the HH income
	* page
	gen page=age_1
	* sage
	gen sage=age_2
	*depx. What should be best approach. drop HH with dependents, or drop older children 
	gen depx=nkids05+nkids613+nkids1418 
	*dep13
	gen dep13=nkids05+nkids613
	gen dep17=nkids05+nkids613+nkids1417
	gen dep18=nkids05+nkids613+nkids1418
	**Not valid anymore
	*bysort year month serial:egen incearn_1=max((incbus+incwage+incfarm)*(sex==1)*(relate<250))
	*bysort year month serial:egen incearn_2=max((incbus+incwage+incfarm)*(sex==2)*(relate<250))
  
	by    year month serial:gen incwage_1=incwage[1]
	by    year month serial:gen incwage_2=incwage[2]
	
	by    year month serial:gen incbus_1=incbus[1]
	by    year month serial:gen incbus_2=incbus[2]
	by    year month serial:gen incfarm_1=incfarm[1]
	by    year month serial:gen incfarm_2=incfarm[2]
	
	** flag when Both earn the same high number 1099999
 
	replace incwage_2=0 if flag_couple==0
	replace incbus_2=0  if flag_couple==0
	replace incfarm_2=0 if flag_couple==0
	
	
 	*swage pwage
	gen pwages=incwage_1
	gen swages=incwage_2
	**dividends 
	** Will consider Everyone div income. 
	by year month serial:egen dividends=sum(incdivid)
	* intrec 
	by year month serial:egen intrec =sum(incint)
	* stcg ltcg : From SCF We may get a Rough Estimate by Inc quantile
	
	//////////////////////////////////////////////////////////////////////////////////////////////////
	** Keep assumption that stcg ltcg is zero. 
	gen stcg=0
	gen ltcg=0
	*** Other property income subject to NIIT, including
	* alimony income not available for the selected years
	
	by year month serial:egen hincearn_c=sum(inctot*(relate>250))
	* Income from children excluded

 	by year month serial:egen otherprop=sum((incrent+0)*(relate<250))
	*replace otherprop=otherprop
	
	/////////////////////////////////////////////////////////////////////////////////////////////////////
	** Here we keep observations without errors. Flag==0

	replace flag = 7  if incwage_1==1099999 | incwage_2==1099999
	label define flag 7 "high income Household", modify

 
	by year month serial:egen ahincearn_c=sum(inctot*(relate>250))

		* Tempted to drop Children with High earnings.
	replace flag=8 if ahincearn_c>50000
	replace flag=8 if ahincearn_c<0

	label define flag 8 "Household with High income children", modify	
	replace flag=9 if hincrent<0
	label define flag 9 "HH with negative Rent income", modify	
	
	
	**nonprop
	by year month serial:egen nonprop=sum((inceduc+incother)*(relate<250))
	
	*Pensions
	by year month serial:egen pensions=sum((incretir)*(relate<250))
	
	
	* SS Benefits Gross Social Security Benefits
	by year month serial:egen gssi=sum((incss +incssi +incsurv+incdisab)*(relate<250))
	* UI   Unemployment compensation received.
	by year month serial:egen ui=sum(incunemp*(relate<250))
	** Transferences
	by year month serial:egen transfers=sum((incwelfr + incwkcom + incvet + incchild)*(relate<250))
	
	** Rent: For State Tax rebates. Calculate it by decile. May get a different estimate from Housing Survey. 
	** For now use "means" using imputed rent values, by region and income.
	/*tempvar hinc_pc hsize mrent mrent2
	bysort  year month serial:gen `hsize'=_N
	xtile  `hinc_pc'=hhinc/`hsize', n(10)
	bysort  year `hinc_pc' region:egen `mrent'=mean(housret) if housret>0
	bysort  year `hinc_pc' region:egen `mrent2'=max(`mrent') */
	gen     rentpaid=mean_rentpaid if hhtenure==2
	replace rentpaid=0             if rentpaid==.
	
	** Property tax. Calculated at Houshold level already. 
	** just need to zeroed those without prop tax
	replace proptax=0 if proptax==.
******************************************************************************************************************************	
******************************************************************************************************************************
******************************************************************************************************************************
******************************************************************************************************************************
	** otheritem 
	** Other Itemized deductions that are a preference for the Alternative Minimum Tax. 
	** These would include Other state and local taxes Preference share of medical expenses; Miscellaneous 
	gen otheritem=mean_otheritem
	** Childcare Expenses:
	gen childcare=mean_childcare*(dep13>0)
	gen mortgage =0
	*** NEw items
	gen scorp=0
	*+mean_fincbtxm
	**

	gen     pbusinc =incfarm_1	
	replace pbusinc =pbusinc+incbus_1  if occ_sbt_1==0
 	gen     pprofinc=incbus_1          if occ_sbt_1==1
	gen     sbusinc =incfarm_2
	replace sbusinc =sbusinc+incbus_2  if occ_sbt_2==0
    gen     sprofinc=incbus_2          if occ_sbt_2==1
	replace sprofinc=0 if sprofinc==.
	replace pprofinc=0 if pprofinc==.
	
*** If Pbsincome or pbus are negative, take it out of, and addit to wages
** Otherwise, taxsim cannot use the data for computations
	replace  pwages=pwages+pbusinc  if pbusinc<0
	replace  pbusinc=0 if pbusinc<0
	replace  pwages=pwages+pprofinc if pprofinc<0
	replace  pprofinc=0 if pprofinc<0
	replace  swages=swages+sbusinc  if sbusinc<0
	replace  sbusinc=0  if sbusinc<0
	replace  swages=swages+sprofinc if sprofinc<0
	replace  sprofinc=0 if sprofinc<0
 
	tab flag	 
 	tab flag if pid_sort==1	
	compress
		replace scorp=0

	save  ".\data\cps_pprocess_taxsim.dta", replace
	 
	use ".\data\cps_pprocess_taxsim.dta", replace
	replace scorp=0

    keep if flag==0
 
	gen year_or=year
 	replace year=year-1
	**  variables for taxim only
		keep year year_or hhincome month serial state mstat page sage depx dep13 dep17 dep18 ///
		                             pwages swages dividends intrec stcg ltcg ///
									 hincearn_c otherprop nonprop pensions gssi ui ///
									 transfers rentpaid otheritem childcare mortgage ///
									 proptax scorp pbusinc pprofinc sbusinc sprofinc  
		duplicates drop
		taxsim32, full replace  
		
		foreach i of varlist fiitax-v42 {
			ren `i' f_`i'
		
			}	
		
		replace year=2018

		taxsim32, full replace
		foreach i of varlist fiitax-v42 {
			ren `i' t_`i'
			}
				
		replace year=year_or
		save  ".\data\Taxsim_output.dta", replace
    
*/	

